Analysis of 5 smFRET samples

In this notebook:

Per-sample FRET histograms
Collapsed (merged channels) FRET histograms
FRET vs distance plot (multi-spot vs usALEX)
D-only fraction plots
Burst size vs measurement (corrected and uncorrected)
Background (per CH) vs measuremets
Burst-per-second (per CH) vs measuremet
Burst accumulation vs time (per CH and all-CH)

USAGE TIP: to comment-out a code cell, select all (CTRL+a) and hit CTRL+/.

FRET fitting remarks

In this notebook we fit the Proximity Ratio histogram using different models.

Load FRETBursts software



In [ ]:

    
from fretbursts import *
sns = init_notebook()



In [ ]:

    
import os
import pandas as pd
from IPython.display import display
from IPython.display import display, Math



In [ ]:

    
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed



In [ ]:

    
import lmfit
print('lmfit version:', lmfit.__version__)

8-spot paper plot style



In [ ]:

    
PLOT_DIR = './figure/'



In [ ]:

    
import matplotlib as mpl
from cycler import cycler

bmap = sns.color_palette("Set1", 9)
colors = np.array(bmap)[(1,0,2,3,4,8,6,7), :]
mpl.rcParams['axes.prop_cycle'] = cycler('color', colors)
colors_labels = ['blue', 'red', 'green', 'violet', 'orange', 'gray', 'brown', 'pink', ]
for c, cl in zip(colors, colors_labels):
    locals()[cl] = tuple(c) # assign variables with color names
sns.palplot(colors)

Data files

Data folder:



In [ ]:

    
data_dir = './data/multispot/'

Check that the folder exists:



In [ ]:

    
data_dir = os.path.abspath(data_dir) + '/'
assert os.path.exists(data_dir), "Path '%s' does not exist." % data_dir

List of data files in data_dir:



In [ ]:

    
from glob import glob
file_list = sorted(glob(data_dir + '*.hdf5'))



In [ ]:

    
labels = ['7d', '12d', '17d', '22d', '27d', 'DO']
files_dict = {lab: fname for lab, fname in zip(sorted(labels), file_list)}
files_dict

Parameters from file

Load the leakage coefficient from disk (computed in Multi-spot 5-Samples analyis - Leakage coefficient fit):



In [ ]:

    
leakage_coeff_fname = 'results/Multi-spot - leakage coefficient KDE wmean DexDem.csv'
leakage = np.loadtxt(leakage_coeff_fname, ndmin=1)

print('Leakage coefficient:', leakage)

Load the direct excitation coefficient ($d_{dirT}$) from disk (computed in usALEX - Corrections - Direct excitation physical parameter):



In [ ]:

    
dir_ex_coeff_fname = 'results/usALEX - direct excitation coefficient dir_ex_t beta.csv'
dir_ex_t = np.loadtxt(dir_ex_coeff_fname, ndmin=1)

print('Direct excitation coefficient (dir_ex_t):', dir_ex_t)

Parameters

Analysis parameters:



In [ ]:

    
gamma_sel = 0.44     # Used to compute burst size during burst selection
donor_ref = False    # False -> gamma correction is: g*nd + na
                     # True  -> gamma correction is: nd + na/g

hist_weights = 'size'

## Background fit parameters
bg_kwargs_auto = dict(fun=bg.exp_fit,
                 time_s = 30,
                 tail_min_us = 'auto',
                 F_bg=1.7,
                 )

## Burst search
F=6
dither = False
size_th = 30    # Burst size threshold (selection on corrected burst sizes)

## FRET fit parameters
bandwidth = 0.03        # KDE bandwidth
E_range = {'7d':  (0.7, 1.0), '12d': (0.4, 0.8), '17d': (0.2, 0.4), 
           '22d': (0.0, 0.1), '27d': (0.0, 0.1), 'DO': (0.0, 0.1)}
E_axis_kde = np.arange(-0.2, 1.2, 0.0002)

Processing and plot options:



In [ ]:

    
# Data load options
reload_data = 1
burst_search = 1
delete_ph_times = 0

# Plot output options save_figure = 0 nosuptitle = False plt.rc('savefig', dpi=75) # Changes the figure size in the notebook savefig_kwargs = dict(dpi=200, bbox_inches='tight') # default save-figure options fret_plot_kw = dict(bins = np.r_[-0.2:1.201:bandwidth], fit_color='k', fit_alpha=0.9, fit_lw=2, #fit_fillcolor='#888888', sharey = False)

Utility functions



In [ ]:

    
def print_fit_report(E_pr, gamma=1, leakage=0, dir_ex_t=0, math=True):
    """Print fit and standard deviation for both corrected and uncorrected E
    Returns d.E_fit.
    """
    E_corr = fretmath.correct_E_gamma_leak_dir(E_pr, gamma=gamma, leakage=leakage, dir_ex_t=dir_ex_t)
    
    E_pr_mean = E_pr.mean()*100
    E_pr_delta = (E_pr.max() - E_pr.min())*100
    
    E_corr_mean = E_corr.mean()*100
    E_corr_delta = (E_corr.max() - E_corr.min())*100
    if math:
        display(Math(r'\text{Pre}\;\gamma\quad\langle{E}_{fit}\rangle = %.1f\%% \qquad'
                     '\Delta E_{fit} = %.2f \%%' % \
                     (E_pr_mean, E_pr_delta)))
        display(Math(r'\text{Post}\;\gamma\quad\langle{E}_{fit}\rangle = %.1f\%% \qquad'
                     '\Delta E_{fit} = %.2f \%%' % \
                     (E_corr_mean, E_corr_delta)))
    else:
        print('Pre-gamma  E (delta, mean):  %.2f  %.2f' % (E_pr_mean, E_pr_delta))
        print('Post-gamma E (delta, mean):  %.2f  %.2f' % (E_corr_mean, E_corr_delta))

5-samples processing



In [ ]:

    
df = pd.DataFrame(index=['7d', '12d', '17d', '22d', '27d'], columns=range(8), dtype=float)
df.index.name = 'Sample'
df.columns.name = 'Channel'

E_pr_fret = df.copy()
E_pr_fret_sig = df.copy()
nbursts = df.copy()

7bp sample



In [ ]:

    
data_id = '7d'
if reload_data:
    d7 = loader.photon_hdf5(files_dict[data_id])
    d7.calc_bg(**bg_kwargs_auto)
if burst_search:
    d7.burst_search(m=10, F=F, dither=dither)
    if delete_ph_times: d7.delete('ph_times_m')



In [ ]:

    
gamma_sel, donor_ref



In [ ]:

    
dfs7 = Sel(d7, select_bursts.size, th1=30, gamma=gamma_sel, donor_ref=donor_ref)
dx = dfs7



In [ ]:

    
nbursts.loc[data_id] = dx.num_bursts



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_two_gaussians(add_bridge=True, p2_center=0.8)
fitter.fit_histogram()



In [ ]:

    
E_pr_fret.loc[data_id] = fitter.params['p2_center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='p2_center', show_fit_value=True);



In [ ]:

    
fig, axes = plt.subplots(4, 2, figsize=(14, 12), sharex=True, sharey=True)
fig.subplots_adjust(left=0.08, right=0.96, top=0.93, bottom=0.07,
                    wspace=0.06, hspace=0.15)

for ich, ax in enumerate(axes.ravel()):
    mfit.plot_mfit(fitter, ich=ich, ax=ax)

Compare the effect of burst-size weights



In [ ]:

    
# bext.bursts_fitter(dx, weights=None)
# dx.E_fitter.fit_histogram(mfit.factory_two_gaussians())
# dplot(dx, hist_fret, weights=None, show_model=True, show_fit_stats=True, fit_from='p2_center');
# ylim(0, 6)
# fig_no_w = gcf()
# plt.close(fig_no_w)

# bext.bursts_fitter(dx, weights='size', gamma=0.43)
# dx.E_fitter.fit_histogram(mfit.factory_two_gaussians())
# dplot(dx, hist_fret, weights='size', gamma=0.43,
#       show_model=True, show_fit_stats=True, fit_from='p2_center');
# fig_w = gcf()
# plt.close(fig_w)

# def _plot(weights=False):
#     if weights:
#         display(fig_w)
#     else:
#         display(fig_no_w)

# interact(_plot, weights=False);

12bp sample



In [ ]:

    
data_id = '12d'
if reload_data: 
    d12 = loader.photon_hdf5(files_dict[data_id])
    d12.calc_bg_cache(**bg_kwargs_auto)
if burst_search:
    d12.burst_search(m=10, F=F, dither=dither)
    if delete_ph_times: d12.delete('ph_times_m')



In [ ]:

    
dfs12 = Sel(d12, select_bursts.size, th1=30, gamma=gamma_sel, donor_ref=donor_ref)
dx = dfs12



In [ ]:

    
nbursts.loc[data_id] = dx.num_bursts



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_two_gaussians(add_bridge=True, p2_center=0.65)
fitter.fit_histogram()
E_pr_fret.loc[data_id] = fitter.params['p2_center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='p2_center', show_fit_value=True);

17bp sample



In [ ]:

    
data_id = '17d'
if reload_data:
    d17 = loader.photon_hdf5(files_dict[data_id])
    d17.calc_bg_cache(**bg_kwargs_auto)
if burst_search:
    d17.burst_search(m=10, F=F, dither=dither)
    if delete_ph_times: d17.delete('ph_times_m')



In [ ]:

    
dfs17 = Sel(d17, select_bursts.size, th1=30, gamma=gamma_sel, donor_ref=donor_ref)
dx = dfs17



In [ ]:

    
nbursts.loc[data_id] = dx.num_bursts



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_two_gaussians(add_bridge=False, p2_center=0.4)
fitter.fit_histogram()
E_pr_fret.loc[data_id] = fitter.params['p2_center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='p2_center', show_fit_value=True);

22bp sample



In [ ]:

    
data_id = '22d'
if reload_data:
    d22 = loader.photon_hdf5(files_dict[data_id])
    d22.calc_bg_cache(**bg_kwargs_auto)
if burst_search:
    d22.burst_search(m=10, F=F, dither=dither)
    if delete_ph_times: d22.delete('ph_times_m')



In [ ]:

    
dfs22 = Sel(d22, select_bursts.size, th1=30, gamma=gamma_sel, donor_ref=donor_ref)
dx = dfs22



In [ ]:

    
nbursts.loc[data_id] = dx.num_bursts



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_gaussian()
fitter.fit_histogram()
E_pr_fret.loc[data_id] = fitter.params['center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='center', show_fit_value=True);

27bp sample



In [ ]:

    
data_id = '27d'
if reload_data:
    d27 = loader.photon_hdf5(files_dict[data_id])
    d27.calc_bg_cache(**bg_kwargs_auto)
if burst_search:
    d27.burst_search(m=10, F=F, dither=dither)#, ph_sel=Ph_sel(Dex='Dem'))
    if delete_ph_times: d27.delete('ph_times_m')



In [ ]:

    
dfs27 = Sel(d27, select_bursts.size, th1=30, gamma=gamma_sel, donor_ref=donor_ref)
dx = dfs27



In [ ]:

    
nbursts.loc[data_id] = dx.num_bursts



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_asym_gaussian()
fitter.fit_histogram()
E_pr_fret.loc[data_id] = fitter.params['center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='center', show_fit_value=True);



In [ ]:

    
fitter = bext.bursts_fitter(dx)
fitter.histogram(bins=np.r_[-0.2 : 1.2 : bandwidth])
fitter.model = mfit.factory_gaussian()
fitter.fit_histogram()
E_pr_fret.loc[data_id] = fitter.params['center']
display(E_pr_fret.loc[[data_id]])
print_fit_report(E_pr_fret.loc[data_id], gamma=0.42, leakage=leakage, dir_ex_t=dir_ex_t)



In [ ]:

    
dplot(dx, hist_fret, show_model=True, 
      show_fit_stats=True, fit_from='center', show_fit_value=True);

5-samples: global analysis

Proximity ratios fitted from multispot data:



In [ ]:

    
E_pr_fret = E_pr_fret.round(6)
E_pr_fret



In [ ]:

    
nbursts = nbursts.astype(int)
nbursts



In [ ]:

    
E_pr_fret.to_csv('results/Multi-spot - dsDNA - PR - all_samples all_ch.csv')
nbursts.to_csv('results/Multi-spot - dsDNA - nbursts - all_samples all_ch.csv')



In [ ]:

    
norm = (E_pr_fret.T - E_pr_fret.mean(1))#/E_pr_fret.mean(1)
norm_rel = (E_pr_fret.T - E_pr_fret.mean(1))/E_pr_fret.mean(1)
norm.plot()
norm_rel.plot()



In [ ]:

    
mch_plot_bg(dx)

NOTE: The 27d and DO samples have a trend that correlates with the A-ch background. For these samples could be beneficial to use a D-only burst search.

NOTE 2: Like observed during the leakage fit, even a D-only burst search results in +2% offset (DO) in CH1, this cannot be correlation with the A-background and must be slightly different gamma in the spot.

Corrected $E$ from μs-ALEX data:



In [ ]:

    
data_file = 'results/usALEX-5samples-E-corrected-all-ph.csv'
data_alex = pd.read_csv(data_file).set_index('sample')#[['E_pr_fret_kde']]
data_alex.round(6)



In [ ]:

    
E_alex = data_alex.E_gauss_w
E_alex

Merging the channels

Merge the data of the different channels:



In [ ]:

    
dfs7c = dfs7.collapse()
dfs12c = dfs12.collapse()
dfs17c = dfs17.collapse()
dfs22c = dfs22.collapse()
dfs27c = dfs27.collapse()

Define samples lists

Define list of results and labels:



In [ ]:

    
d_samples = [dfs7, dfs12, dfs17, dfs22, dfs27]#, dfso]
d_samples_c = [dfs7c, dfs12c, dfs17c, dfs22c, dfs27c ]
d_labels = ['7d', '12d', '17d', '22d', '27d']#, 'DO']
CH = np.arange(8)
CH_labels = ['CH%d' % i for i in CH]
dist_s_bp = [7, 12, 17, 22, 27]

Print a summary of current processed data:



In [ ]:

    
def print_params(d_samples, d_labels, status=False):
    print('Sample              Model             Ph_sel')
    for dx, name in zip(d_samples, d_labels):
        print("%3s %25s %35s" % (name, dx.E_fitter.model.name, dx.ph_sel))
    if status:
        print()
        for dx, name in zip(d_samples, d_labels):
            print(dx.status())



In [ ]:

    
print_params(d_samples, d_labels, 1)

Plot FRET vs distance

fontsize = 12 text_pos = {'7d': (10.5, 0.92), '12d': (15.5, 0.74), '17d': (16, 0.43), '22d': (21, 0.18), '27d': (29, 0.15)} text_kwargs = dict(ha='right', va='center', bbox=dict(facecolor='white', edgecolor='white'), zorder=2, fontsize=fontsize-1, ) dfun = lambda E_fit: 100*(E_fit.max()-E_fit.min()) fig, ax = plt.subplots() ax.plot(dist_s_bp, E_fret_mch, '+', lw=2, mew=1.2, ms=8, zorder=4) ax.plot(dist_s_bp, E_alex, '-', lw=3, mew=0, alpha=0.5, color='r', zorder=3) plt.title('Multi-spot smFRET dsDNA, Gamma = %.2f' % multispot_gamma) plt.xlabel('Distance in base-pairs', fontsize=fontsize); plt.ylabel('E', fontsize=fontsize) plt.ylim(0, 1); plt.xlim(0, 30) plt.grid(True) plt.legend(['CH1','CH2','CH3','CH4','CH5','CH6','CH7','CH8', u'μsALEX'], fancybox=True, prop={'size':fontsize-1}, #loc='upper right', bbox_to_anchor=(1.05, 1)) loc='best') if deviance_plot: for sample in E_fret_mch.index: delta = dfun(E_fret_mch.loc[sample]) text_kwargs.update( s="%.1f%%" % delta, #s="$\Delta = %.1f%%$" % delta, x=text_pos[sample][0], y=text_pos[sample][1], ) ax.text(**text_kwargs) #ax.set_axisbelow(True) #if save_figure: plt.savefig("FRET vs distance - %s SPW F%d.png" % (fret_gen_fit_func.__name__, F)) #plt.savefig(PLOT_DIR+"FRET vs distance - Gpost_th30.png", dpi=200, bbox_inches='tight')